We are going to illustrate Principle Components Analysis (PCA), a dimension reduction technique using an image. But, before that please note that much of the code that follows has been inspired by Aaron Schlegel’s Blog Post on SVD and PCA.
You must read the data before trying to run code on your own machine. To read data use the following code after setting your working directory. To set your working directory, modify the following to set the file path for the folder where the data file resides. setwd('c:/thatawesomeclass/)
library(jpeg)
pic = readJPEG("pic.jpg")r = pic[,,1]
g = pic[,,2]
b = pic[,,3]Run PCA on each Color Matrix
pca_r = prcomp(r,center=F)
pca_g = prcomp(g,center=F)
pca_b = prcomp(b,center=F)
pca_rgb = list(pca_r, pca_g, pca_b)First few components explain the bulk of the variance.
varExplained =
data.frame(components = 1:length(pca_r$sdev),
r = pca_r$sdev^2/sum(pca_r$sdev^2),
g = pca_g$sdev^2/sum(pca_g$sdev^2),
b = pca_b$sdev^2/sum(pca_b$sdev^2))
varExplained =
cbind(varExplained,
cum_r = cumsum(varExplained$r),
cum_g = cumsum(varExplained$g),
cum_b = cumsum(varExplained$b))
#varExplained[1:100,]
library(ggplot2); library(dplyr); library(tidyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
varExplained %>%
select(components, cum_r, cum_g, cum_b)%>%
gather(key = color, value = cumulative_variance,2:4)%>%
ggplot(aes(x=components, y = cumulative_variance, color=color))+
geom_point()+
geom_hline(yintercept = 0.99,size=0.6, linetype='dashed')+
scale_color_manual(values = c('blue','green','red'))+facet_grid(.~color)+guides(color=F)Reconstruct image from components
library(ggmap)
compressImage = function(comp){
r_var = round(varExplained[varExplained$components==comp, 'cum_r'],2)*100
g_var = round(varExplained[varExplained$components==comp, 'cum_g'],2)*100
b_var = round(varExplained[varExplained$components==comp, 'cum_b'],2)*100
compressed_pic_pca =
sapply(pca_rgb, function(img){
img$x[,1:comp] %*% t(img$rotation[,1:comp])
},simplify = 'array')
ggimage(compressed_pic_pca,fullpage = F)+
ggtitle(paste0(comp,' Components',': ',r_var,'%R, ',g_var,'%G, ',b_var,'%B'))+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
plot.title = element_text(size=11))
}ggimage(pic,fullpage = F)+
ggtitle('Original Image')+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
plot.title = element_text(size=11))compressImage(nrow(varExplained))compressImage(500)compressImage(100)compressImage(50)compressImage(10)compressImage(9)compressImage(8)compressImage(7)compressImage(6)compressImage(5)All components, 500, 100 50, 10, 8 7, 6, 5
library(ggmap); library(dplyr)
compressImage = function(comp){
r_var = round(varExplained[varExplained$components==comp, 'cum_r'],2)*100
g_var = round(varExplained[varExplained$components==comp, 'cum_g'],2)*100
b_var = round(varExplained[varExplained$components==comp, 'cum_b'],2)*100
compressed_pic_pca =
sapply(pca_rgb, function(img){
img$x[,1:comp] %*% t(img$rotation[,1:comp])
},simplify = 'array')
ggimage(compressed_pic_pca,fullpage = T)
}
library(gridExtra)
g1 = compressImage(nrow(varExplained))
g2 = compressImage(500)
g3 = compressImage(100)
g4 = compressImage(50)
g5 = compressImage(10)
g6 = compressImage(8)
g7 = compressImage(7)
g8 = compressImage(6)
g9 = compressImage(5)
grid.arrange(g1, g2, g3, g4, g5, g6, g7, g8, g9, ncol=3)Run SVD on each Color Matrix
svd_r = svd(r)
svd_g = svd(g)
svd_b = svd(b)
svd_rgb = list(svd_r, svd_g, svd_b)First few components explain the bulk of the variance.
varExplained_svd =
data.frame(components = 1:length(svd_r$d),
r = svd_r$d^2/sum(svd_r$d^2),
g = svd_g$d^2/sum(svd_g$d^2),
b = svd_b$d^2/sum(svd_b$d^2))
varExplained_svd =
cbind(varExplained_svd,
cum_r = cumsum(varExplained_svd$r),
cum_g = cumsum(varExplained_svd$g),
cum_b = cumsum(varExplained_svd$b))
#varExplained_svd[1:100,]
library(ggplot2); library(dplyr); library(tidyr)
varExplained_svd %>%
select(components, cum_r, cum_g, cum_b)%>%
gather(key = color, value = cumulative_variance,2:4)%>%
ggplot(aes(x=components, y = cumulative_variance, color=color))+
geom_point()+
geom_hline(yintercept = 0.99,size=0.6, linetype='dashed')+
scale_color_manual(values = c('blue','green','red'))+facet_grid(.~color)+guides(color=F) ## Reconstruct Image
Construct Image from Components
library(ggmap)
compressImage_svd = function(comp){
r_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_r'],3)*100
g_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_g'],3)*100
b_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_b'],3)*100
compressed_pic_svd =
sapply(svd_rgb, function(img){
img$u[,1:comp] %*% diag(img$d[1:comp]) %*% t(img$v[,1:comp])
},simplify = 'array')
ggimage(compressed_pic_svd,fullpage = F)+
ggtitle(paste0(comp,' Components',': ',r_var,'%R, ',g_var,'%G, ',b_var,'%B'))+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
plot.title = element_text(size=11))
}compressImage_svd(nrow(varExplained_svd))compressImage_svd(500)compressImage_svd(100)compressImage_svd(50)compressImage_svd(10)compressImage_svd(9)compressImage_svd(8)compressImage_svd(7)compressImage_svd(6)compressImage_svd(5)All components, 500, 100 50, 10, 8 7, 6, 5
library(ggmap)
compressImage_svd = function(comp){
r_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_r'],3)*100
g_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_g'],3)*100
b_var = round(varExplained_svd[varExplained_svd$components==comp, 'cum_b'],3)*100
compressed_pic_svd =
sapply(svd_rgb, function(img){
img$u[,1:comp] %*% diag(img$d[1:comp]) %*% t(img$v[,1:comp])
},simplify = 'array')
ggimage(compressed_pic_svd,fullpage = T)
}
library(gridExtra)
g1 = compressImage_svd(nrow(varExplained))
g2 = compressImage_svd(500)
g3 = compressImage_svd(100)
g4 = compressImage_svd(50)
g5 = compressImage_svd(10)
g6 = compressImage_svd(8)
g7 = compressImage_svd(7)
g8 = compressImage_svd(6)
g9 = compressImage_svd(5)
grid.arrange(g1, g2, g3, g4, g5, g6, g7, g8, g9, ncol=3)This file was generated using R Version 4.1.2